Build merged Seurat Object of the three patients: s113, s116,
s118
MT: medial region of the tibia (affected)
OLT: outer lateral region of the tibia (healthy)
MT1.data <-Read10X(data.dir =paste0(wd,"MT_113"))
MT1<- CreateSeuratObject(counts = MT1.data,
project = "MT1", min.cells = 300, min.features = 500)
MT2.data <-Read10X(data.dir = paste0(wd,"MT_116"))
MT2<- CreateSeuratObject(counts = MT2.data,
project = "MT2", min.cells = 300, min.features = 500)
MT3.data <-Read10X(data.dir = paste0(wd,"MT_118"))
MT3<- CreateSeuratObject(counts = MT3.data,
project = "MT3", min.cells = 300, min.features = 500)
olt1.data <-Read10X(data.dir = paste0(wd,"olt_113"))
olt1<- CreateSeuratObject(counts = olt1.data,
project = "olt1", min.cells = 300, min.features = 500)
olt2.data <-Read10X(data.dir = paste0(wd,"olt_116"))
olt2<- CreateSeuratObject(counts = olt2.data,
project = "olt2", min.cells = 300, min.features = 500)
olt3.data <-Read10X(data.dir = paste0(wd,"olt_118"))
olt3<- CreateSeuratObject(counts = olt3.data,
project = "olt3", min.cells = 300, min.features = 500)
tot113 <- merge(MT1, y = olt1, add.cell.ids = c("MT_s113", "Olt_s113"), project = "tot113")
length(Cells(tot113))
tot116 <- merge(MT2, y = olt2, add.cell.ids = c("MT_s116", "Olt_s116"), project = "tot116")
length(Cells(tot116))
tot118 <- merge(MT3, y = olt3, add.cell.ids = c("MT_s118", "Olt_s118"), project = "tot118")
length(Cells(tot118))
pt_list <-list(tot113,tot116,tot118)
names(pt_list) <-c("tot113","tot116","tot118")
Pre-processing
for( i in 1:length(pt_list)){
counts <- GetAssayData(object = pt_list[[i]], slot = "counts")
nonzero <- counts > 0
keep_genes <- Matrix::rowSums(nonzero) >= 100
filtered_counts <- counts[keep_genes, ]
filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = pt_list[[i]]@meta.data)
tmp <-filtered_seurat
tmp[["percent.mt"]] <- PercentageFeatureSet(tmp , pattern = "^MT-")
VlnPlot(tmp, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(tmp, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(tmp, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
# plot1 + plot2, visualize to set nFeature_RNA, nCount_RNA and percent.mt thresholds
if(i == 1) tmp <- subset(tmp, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 4 & nCount_RNA < 50000)
if(i == 2) tmp <- subset(tmp, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 5 & nCount_RNA < 50000)
if(i == 3) tmp <- subset(tmp, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 3 & nCount_RNA < 40000)
tmp<- NormalizeData(tmp)
tmp1<- FindVariableFeatures(tmp, selection.method = "vst", nfeatures = 8500)
pt_list[[i]] <-tmp1
}
Integrate patient data
features <- SelectIntegrationFeatures(pt_list, nfeatures = 8500)
anchors <- FindIntegrationAnchors(pt_list, anchor.features=features)
OAtot <-IntegrateData(anchors, new.assay.name = "integrate")
Examine and visualize PCA
all.genes <- rownames(OAtot)
OAtot <- ScaleData(OAtot, features = all.genes)
OAtot <- RunPCA(OAtot, features = VariableFeatures(object = OAtot))
print(OAtot[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: S100A4, COL1A2, CRTAC1, HTRA1, COL3A1
## Negative: FRZB, CFH, JUNB, ACAN, COL9A3
## PC_ 2
## Positive: CHI3L2, NNMT, CHI3L1, CLU, RPL13
## Negative: FGFBP2, S100A1, SCRG1, C2orf82, CHAD
## PC_ 3
## Positive: COMP, OMD, PRELP, CILP, SMOC2
## Negative: LMNA, COL1A1, VIM, NBL1, B2M
## PC_ 4
## Positive: IBSP, COL10A1, SPP1, MT-ND3, SOD3
## Negative: COL2A1, EEF1A1, RPL3, CILP2, RPS2
## PC_ 5
## Positive: RPL23A, RPS16, RPL37, RPL37A, RPLP2
## Negative: CLU, ITM2B, LUM, DCN, PDIA3
Run UMAP: 10 PC chosen
VizDimLoadings(OAtot, dims = 1:2, reduction = "pca")

DimPlot(OAtot, reduction = "pca")

DimHeatmap(OAtot, dims = 1:15, cells = 500, balanced = TRUE)

ElbowPlot(OAtot) ###9-12 PC

OAtot <- FindNeighbors(OAtot, dims = 1:10)
OAtot <- FindClusters(OAtot, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 23752
## Number of edges: 690120
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8396
## Number of communities: 9
## Elapsed time: 13 seconds
OAtot <- RunUMAP(OAtot, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
Save UMAP coordinates for reproducibility
umapCoord <- Embeddings(object = OAtot[["umap"]])
save(umapCoord,file=paste0(MEDIAsave,"CoordUMAP.RData"))
Visualize UMAP for: clusterization, class, single patient
load(paste0(MEDIAsave,"CoordUMAP.RData"))
OAtot@meta.data$type <-sapply(strsplit(rownames(OAtot@meta.data),"_"), function(x) x[1])
OAtot@meta.data$patient <-sapply(strsplit(rownames(OAtot@meta.data),"_"), function(x) x[2])
OAtot@reductions[["umap"]]@cell.embeddings <- umapCoord
DimPlot(OAtot, reduction = "umap", label = TRUE, label.size = 7)+
ggtitle("Clustering")+
theme(plot.title = element_text(hjust = 0.5))

DimPlot(OAtot, reduction = "umap", group.by = "type", label = TRUE,
label.size = 7,cols= c("turquoise","orangered"))+
ggtitle("Class")

DimPlot(OAtot, reduction = "umap", group.by = "patient")+
ggtitle("Patient")

Pseudo-bulk singleCell data trasformation
ei <-OAtot@meta.data[,c(1,6,7,8)]
head(ei)
## orig.ident seurat_clusters type patient
## MT_s113_AAACCTGGTAGCACGA-1 MT1 2 MT s113
## MT_s113_AAACGGGAGGCAGGTT-1 MT1 2 MT s113
## MT_s113_AAACGGGCAAATACAG-1 MT1 2 MT s113
## MT_s113_AAACGGGCAAGAGTCG-1 MT1 0 MT s113
## MT_s113_AAACGGGCACCGCTAG-1 MT1 0 MT s113
## MT_s113_AAACGGGCAGTCTTCC-1 MT1 0 MT s113
ei$clus <-0 #merge in unique
OA_sce <-as.SingleCellExperiment(OAtot, assay = "RNA")
OA_sce$clus <-0
(OA_sce2 <- prepSCE(OA_sce,
kid = "seurat_clusters",
gid = "type",
sid = "orig.ident",
drop = FALSE))
## class: SingleCellExperiment
## dim: 10060 23752
## metadata(1): experiment_info
## assays(2): counts logcounts
## rownames(10060): AP006222.2 SAMD11 ... LCA5L LINC00205
## rowData names(0):
## colnames(23752): MT_s113_AAACCTGGTAGCACGA-1 MT_s113_AAACGGGAGGCAGGTT-1
## ... Olt_s118_TTTGTCAGTGCACGAA-1 Olt_s118_TTTGTCATCCTAAGTG-1
## colData names(10): cluster_id sample_id ... ident clus
## reducedDimNames(2): PCA UMAP
## mainExpName: RNA
## altExpNames(0):
OA_sce2$cond <-paste0(OA_sce2$sample_id,"_",OA_sce2$patient)
experiment_info <- data.frame(sample_id=as.factor(c("MT1","MT2","MT3","olt1","olt2","olt3")),
cond=as.factor(c("MT1_s113 ","MT2_s116",
"MT3_s118","olt1_s113","olt2_s116","olt3_s118")),
patient=as.factor(rep(c("s113","s116","s118"),2)),
group_id=as.factor(c("MT","MT","MT","Olt","Olt","Olt")),
n_cells=as.numeric(table(ei$orig.ident)))
OA_sce2@metadata <- list(experiment_info)
names(OA_sce2@metadata) <-"experiment_info"
table(OA_sce2$cluster_id,OA_sce2$sample_id)
##
## MT1 MT2 MT3 olt1 olt2 olt3
## 0 1733 903 2477 223 321 325
## 1 64 27 62 2260 1135 2037
## 2 897 442 1189 101 67 70
## 3 2 13 81 1350 933 378
## 4 0 0 12 858 895 633
## 5 18 134 42 1115 259 474
## 6 243 693 377 16 5 66
## 7 51 486 4 2 0 20
## 8 22 0 22 155 46 14
pb <- aggregateData(OA_sce2, assay = "counts", fun = "sum")
colData(pb)
## DataFrame with 6 rows and 4 columns
## group_id patient clus cond
## <factor> <character> <numeric> <character>
## MT1 MT s113 0 MT1_s113
## MT2 MT s116 0 MT2_s116
## MT3 MT s118 0 MT3_s118
## olt1 Olt s113 0 olt1_s113
## olt2 Olt s116 0 olt2_s116
## olt3 Olt s118 0 olt3_s118
plotPCA(OA_sce2, colour_by = "group_id")
